{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "使用GBRT算法建立Sepal.Length、Sepal.Width、Petal.Length对Petal.Width的回归模型。首先，建立函数gbrt_build，使用训练数据以构建GBRT模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import tree\n",
    "import numpy as np\n",
    "\n",
    "def gbrt_build(x,y,consame=5,maxiter=10000,shrinkage=0.0005):\n",
    "    \"\"\"\n",
    "    建立函数构建GBRT模型\n",
    "    x : 输入数据，解释变量\n",
    "    y : 输出数据，响应变量\n",
    "    consame : 当连续consame次得到的残差平方和相等时算法终止\n",
    "    maxiter : 迭代次数的上限\n",
    "    shrinkage : 缩放因子\n",
    "    \"\"\"\n",
    "    # 使平方损失函数最小化的常数值为对应数据的平均值，即以均值初始化f0\n",
    "    f0 = np.mean(y)\n",
    "    #初始化变量\n",
    "    rss = []\n",
    "    model_list = [f0]\n",
    "    # 进入循环，当连续consame次，得到的残差平方和相等或超过最大迭代次数时终止算法\n",
    "    for i in range(maxiter):\n",
    "        # 计算负梯度，当损失函数为平方损失函数时，负梯度即为残差\n",
    "        revals = y - f0\n",
    "        # 根据残差学习一棵回归树，设置分割点满足的最小样本量为 30\n",
    "        clf = tree.DecisionTreeRegressor(min_samples_leaf=30)\n",
    "        clf = clf.fit(x, revals)\n",
    "        # 更新回归树，并生成估计结果\n",
    "        model_list.append(clf)\n",
    "        f0 = f0 + shrinkage*clf.predict(x)\n",
    "        # 统计残差平方和\n",
    "        rss.append(np.sum((f0 - y)**2))\n",
    "        # 判断是否满足终止条件\n",
    "        if len(rss) >= consame and np.std(rss[(len(rss)-consame+1):len(rss)]) == 0 :\n",
    "            print(\"共迭代\",m+1,\"次，满足终止条件退出迭代！\")\n",
    "            break\n",
    "    return rss,model_list"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "在使用iris数据建立GBRT模型之前，需要对其进行简单处理，然后调用函数gbrt_build，完成模型的训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    10000.000000\n",
       "mean         9.443136\n",
       "std         11.613604\n",
       "min          2.954312\n",
       "25%          3.230344\n",
       "50%          3.919855\n",
       "75%          9.398138\n",
       "max         59.853140\n",
       "dtype: float64"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "iris = pd.read_csv(\"http://image.cador.cn/data/iris.csv\")\n",
    "x,y = iris.drop(columns=['Species','Petal.Width']),iris['Petal.Width']\n",
    "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=1)\n",
    "rss,model_list = gbrt_build(x_train,y_train)\n",
    "\n",
    "#查看rss的统计信息\n",
    "pd.Series(rss).describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "为进一步直观呈现每次迭代残差平方和的变化，现绘制二维图形"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAEJCAYAAAByupuRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deXxV9Z3/8dcnC7uVLQJGIaC4LyARtI4CisC4FevG6GhxBqnWtta2jvvSqq2djnX0Z60LagewKoqDYFNGsSCLBEkQEYqiAi48UAIomxBC8vn9cU6ulxhIIMk9N/e8n4/HfXCWu3y+94b7vmf5fo+5OyIiEk9ZURcgIiLRUQiIiMSYQkBEJMYUAiIiMaYQEBGJsZyoC9gbnTt39oKCgqjLEBFpVkpLS9e5e15t65pVCBQUFFBSUhJ1GSIizYqZfby7ddodJCISYwoBEZEYUwiIiMSYQkBEJMYUAiIiMZbSEDCzG8xstpm9Y2ajzazAzF43szfM7FEzy05lPSIicZeyEDCzE4FTgYHhvz2B3wH3ufvAsJbzmuK1y8rKmDlzJmPHjm2KpxcRabZS2U/gbOA9YDLQDrgReAEYGa4vAk4D/rcxX7SyspKDDz6Y8vJyAC644AI6dOjQmC8hItJspXJ3UDfgROAi4GpgApDr31zQYAvQqeaDzGyMmZWYWUlZWdlev2h2djZHHHFEYn7p0qX7ULqISGZKZQhsB4rcvdzdlxN86bc3MwvXtwfW13yQuz/u7oXuXpiXV2uv5zodffTRiWmFgIjIN1IZAvOAIWaWZWbdCHYJTSPYBQTB7qLZTfHCCgERkdql8pjARKAf8GY4fw2wChhrZjnAMmBKU7ywQkBEpHYpCwF3rwJuqGXV6U392sccc0xiWiEgIvKNWHQW69mzJ61btwbgiy++YN26dRFXJCKSHmIRAllZWRx55JGJeW0NiIgEYhECoOMCIiK1UQiIiMSYQkBEJMZiGQJLlizhm47KIiLxFZsQ6NGjB23btgVg/fr1rF27NuKKRESiF5sQyMrK4qijjkrMa5eQiEiMQgB0XEBEpCaFgIhIjMU2BN59990IKxERSQ+xCoHjjjsuMf3uu+9SVVUVYTUiItGLVQgceOCBdOoUXLdm8+bNrFq1KtqCREQiFqsQMDOOP/74xPw777wTYTUiItGLVQgACgERkSSxDoFFixZFWImISPRiHQLaEhCRuItdCBx11FHk5uYCsGrVKjZu3BhxRSIi0YldCLRo0WKXC8wsXrw4wmpERKIVuxAA7RISEammEFAIiEiMKQQUAiISY7EPgSVLllBZWRlhNSIi0YllCOTl5dGtWzcAtm3bxvLlyyOuSEQkGrEMAYATTjghMV1aWhphJSIi0YltCBQWFiamS0pKIqxERCQ6sQ2Bfv36Jaa1JSAicZWTyhczswXA1nB2JfAr4MmwjmXAte6ekqO0ySGwcOFCKisryc7OTsVLi4ikjZRtCZhZCyDH3QeFtyuB3wH3ufvAsJbzUlXPgQceyIEHHgjA119/zXvvvZeqlxYRSRup3B10LNDWzF4zs9fN7CRgADA9XF8EnFrzQWY2xsxKzKykrKysUQvSLiERibtUhsB24H5gKHAN8Bcg1909XL8F6FTzQe7+uLsXunthXl5eoxakg8MiEnepPCbwAfBh+KW/3MzWASeYmYXL2gPrU1iPQkBEYi+VWwKjgIcAzCwf+A4wBTgtXH82MDuF9eyyO2jRokXs3LkzlS8vIhK5VIbAn4E2ZjYHeA64EvglcKeZzQJ2EIRCynTp0oWDDjoICHoOL1u2LJUvLyISuZTtDnL3HcDltaw6PVU11KawsJDPPvsMCHYJHXvssVGWIyKSUrHtLFYteZeQjguISNzEPgSSDw7rNFERiZvYh0DNg8MVFRURViMiklqxD4G8vDy6d+8OQHl5OUuXLo24IhGR1Il9CIB2CYlIfCkE2DUE5s+fH2ElIiKppRAATjrppMT0vHnzIqxERCS1FALAiSeeSFZW8FYsXbqUjRs3RlyRiEhqKASAdu3aJS4+7+7aJSQisaEQCJ188smJ6TfffDPCSkREUkchEPrud7+bmNZxARGJC4VAKHlLoLi4mKqqqgirERFJDYVAqGfPnnTp0gWATZs28Y9//CPiikREmp5CIGRmu+wS0nEBEYkDhUCS5F1COi4gInGgEEiiLQERiRuFQJJ+/fqRm5sLwPLly1m3bl3EFYmINC2FQJJWrVpxwgknJOaLi4sjrEZEpOkpBGrQcQERiROFQA06LiAicaIQqCE5BObPn8+OHTsirEZEpGkpBGrIz8+nV69eAGzbto0FCxZEXJGISNNRCNRi4MCBiek33ngjwkpERJqWQqAWCgERiQuFQC2SQ2Du3LlUVFREWI2ISNNRCNSioKCAHj16ALB161YWLlwYcUUiIk1DIbAbyVsDM2fOjK4QEZEmlNIQMLM2ZrbCzI4ws45mNsXMZpjZRDNrm8pa6qLjAiISB6neErgbaB9O3whMdffBwCJgTIpr2aPkEJgzZw47d+6MsBoRkaaRshAwsxOBjsDicNFpQFE4XQScupvHjTGzEjMrKSsra/pCQ7169eKggw4CYPPmzSxatChlry0ikiopCQEzywHuA25IWtwJ+Cqc3hLOf4u7P+7uhe5emJeX17SFJjEzBg0alJh//fXXU/baIiKpkqotgf8Axrt78tjMXwL7hdPtgfUpqqXezjjjjMT0a6+9FmElIiJNI1UhMBwYZWYzgT7AOGAJMCxcfzYwO0W11NuZZ56ZmJ4zZw5ff/11hNWIiDS+lISAu5/m7oPcfRDBQeArCA4MXxwGw9HA46moZW/k5+dz1FFHAVBeXs7s2WmXUyIiDZLyfgJhGLzn7uvc/exw/mJ335rqWuojeWtAu4REJNOos1gdFAIikskUAnUYOHBg4rrDixcv5vPPP4+4IhGRxqMQqEO7du12udDM9OnTI6xGRKRxKQTqQbuERCRTKQTqoWYIuHuE1YiINB6FQD3069ePDh06ALBmzRqWLl0acUUiIo1DIVAP2dnZ6j0sIhlJIVBPybuEpk2bFmElIiKNRyFQT8OHD09Mz5w5k82bN0dYjYhI41AI1FP37t057rjjANixY4dOFRWRjKAQ2AvnnHNOYvqVV16JsBIRkcahENgLySHw17/+laqqqgirERFpuH0OATPrYGYtG7OYdNe/f386d+4MwBdffEFpaWnEFYmINEydIWBm/2xmk8ysSzh/kJktANYBX5rZ3U1dZLrIzs7mrLPOSsxrl5CINHd7DAEzGwi8AhwEtAgXjwOOAX5EcF2AK83s0qYsMp3ouICIZJK6tgRuAh519wHu/qmZ9QYGAY+5+2Pu/iLwc+C6Jq4zbQwdOpScnBwAFi5cyGeffRZxRSIi+66uEOgDPJw0fybgwEtJy4qBwxu5rrS1//7773IB+smTJ0dXjIhIA9UVAu2A5F5Rg4Fygi/+aq2A3EauK619//vfT0xPmjQpwkpERBqmrhD4EDgBwMxaA0OAue6+I+k+ZwIfNE156WnEiBGYGQCzZs2irKws4opERPZNXSHwJ+AhM/shMB7YH3gEwAJDgV8BzzdplWmmW7duiQvNVFVV8fLLL0dckYjIvqkrBJ4A/gL8BvgucJO7/2+47kFgGjAX+K8mqzBNXXDBBYnpl156aQ/3FBFJX7avF0gxs+OAlu6+oHFL2r3CwkIvKSlJ1cvt0apVq+jZsycAubm5rF27lvbt20dclYjIt5lZqbsX1rauIcNGfAosbsDjm7WCggL69esHQEVFBVOnTo24IhGRvaceww2QvEvo2WefjbASEZF9ox7DDTBy5MjE9KuvvqqzhESk2VGP4Qbo2bNn4iyhyspKXnjhhYgrEhHZOynrMWxmWWb2hJnNMbPZZnasmRWY2etm9oaZPWpm2XvdgohddtlliekJEyZEWImIyN5LZY/hc4Fsd/8n4A6C005/B9zn7gPDWs6rZ91p46KLLkqMJTRv3jw+/PDDiCsSEam/lPUYdveXgTHhbAHwFjAAqL5OYxFwan0LTxd5eXm7DC89fvz4CKsREdk7Ke0x7O47zexp4L+B9UCuf9NRYQvQqeZjzGyMmZWYWUm6Hni9/PLLE9Pjxo3TFcdEpNlojB7Db7IXPYbd/UrgMOBWoI1VD8ID7QmCoeb9H3f3QncvzMvLq+/LpNQ555yT6Ci2atUq5syZE3FFIiL1s8cQ8MAt7t7J3Q909/9MWj2WYHfOCODaul7IzK4ws5vC2a8JjjX8HTgtXHY2MHtvG5AOWrVqxSWXXJKYf/rppyOsRkSk/urTWexnZlZqZsVm9pPq5e6+GNhOsCVwfz1eaxJwopnNItj/fx1wA3BnuGwHMGUf2pAWrrzyysT0xIkT2bRpU4TViIjUT12dxX4L/AH4CigDfm9mV4XrrgdKgJ4Encb2yN23uvsF7n6au5/q7v/n7ivc/fRw2Q/dvbLBLYpI//79OeaYYwD4+uuvee655yKuSESkbnVtCVwK3OzuZ7j7ucC/AFeb2a0Ev/6fAY5w92eauM60Z2aMHj06MT927NgIqxERqZ+6QqAbMDFp/mXgaOAXwIXu/m/u/mVTFdfc/Ou//istWgSjayxYsIB33nkn4opERPasrhDIATZWz7h7FbANuM7dNYh+DZ06ddrl0pPaGhCRdLevQ0kX132XeEreJTR+/Hi2bt0aYTUiIntWnxCo7aoz5Y1dSKYYPHgwhx56KAAbN27UAWIRSWv1CYH7zeyR6hvQGrg3eVm4XICsrCyuueaaxPwf//hH9vXqbSIiTW2Pl5c0s5nUviVQk7v76Y1V1O6k0+Ul92TDhg3k5+ezfft2AObOnZsYclpEJNX2dHnJnD090N0HNUlFGa5jx45ceumlPPXUUwA88MADCgERSUsNucaw7MHPfvazxPRLL73EqlWroitGRGQ3FAJN5Nhjj2XIkCEAVFVV8dBDD0VckYjItykEmtD111+fmB47dqzGExKRtKMQaELDhw/n8MODK29u3rw5cYxARCRdKASaUFZW1i7HBh566CEqK5vtGHkikoEUAk3siiuuoGPHjgCsXLmSSZMmRVyRiMg3FAJNrE2bNrt0HrvvvvvUeUxE0oZCIAWuu+46WrduDcDbb7/Nq6++GnFFIiIBhUAK5OXl7TKw3D333KOtARFJCwqBFPnlL39Jbm4uAHPmzGHGjBkRVyQiohBIme7du+9yHeJf/epXEVYjIhJQCKTQzTffTE5OMFzTrFmztDUgIpFTCKRQQUEBo0aNSszfdtttOjYgIpFSCKTYbbfdljg28Oabb1JUVBRxRSISZwqBFOvRowdXX311Yv6WW26hqqoqwopEJM4UAhG45ZZbaNOmDQCLFy/mmWeeibgiEYkrhUAEunbtyi9+8YvE/K233pq4CpmISCopBCJyww03kJeXB8Cnn36q6w2ISCQUAhHZb7/9uOuuuxLz9957L1988UV0BYlILKUsBMwsx8z+bGazzewtMzvXzArM7HUze8PMHjWz7FTVkw6uuuoqjjjiCAA2bdrErbfeGnFFIhI3qdwSuAzY4u6nAmcBDwO/A+5z94FhLeelsJ7I5ebm8sADDyTmn3rqKUpLSyOsSETiJpUhMAmo/qlbfU7kAGB6OF0EnJrCetLC8OHDOeeccwBwd6699lqdMioiKZOyEHD3Le6+0cz2A14Ebgdy/Zsus1uATjUfZ2ZjzKzEzErKyspSVW5K/eEPf6BFixYAzJ8/n7Fjx0ZckYjERUoPDJtZPsEv/7+4+zigwswsXN0eWF/zMe7+uLsXunth9dk0maZ3797ceOONifmbbrpJB4lFJCVSeWC4G/AqcKu7V//ULQFOC6fPBmanqp50c/PNN9OrVy8AvvzyS6677rqIKxKROEjllsBNBLt7bjOzmWY2E7gTuNPMZgE7gCkprCettG7dmj/96U+J+eeff56pU6dGWJGIxIE1p1EsCwsLvaSkJOoymtSoUaP4n//5HwDy8/N599136dChQ8RViUhzZmal7l5Y2zp1Fksz999/PwcccAAAq1ev5qc//WnEFYlIJlMIpJlOnTrx6KOPJuYnTJjAiy++GGFFIpLJFAJp6Pzzz+eKK65IzF999dV8/vnnEVYkIplKIZCmHnzwQQ4++GAA1q9fz+WXX05lZWXEVYlIplEIpKn27dvz9NNPU92NYvr06fz2t7+NuCoRyTQKgTR2xhlncMsttyTm77zzTmbOnBldQSKScRQCae6uu+7i1FODIZWqqqq49NJLWbt2bcRViUimUAikuZycHJ599lk6d+4MwJo1axg5ciQVFRURVyYimUAh0Azk5+czYcKExPyMGTP4+c9/HmFFIpIpFALNxLBhw/j1r3+dmH/44Yd5/PHHI6xIRDKBQqAZue2227jooosS8z/60Y947bXXIqxIRJo7hUAzYmY8/fTT9OnTB4DKykouvPBC3n333YgrE5HmSiHQzLRt25ZXXnmF/Px8ILg28bBhw1i5cmXElYlIc6QQaIby8/MpKipiv/32A4IzhoYOHaoL0YjIXlMINFPHHXccU6ZMoWXLlgB8+OGHDB8+nA0bNkRcmYg0JwqBZmzQoEE899xzZGUFH+OiRYsYNmwYGzdujLgyEWkuFALN3IgRI3jyyScT8yUlJQwbNoyvvvoqwqpEpLlQCGSAUaNG8dhjjyXm58+fz5AhQ7RrSETqpBDIEGPGjOGRRx5JzJeWljJw4EBWr14dYVUiku4UAhnkmmuu4YknnkgMP71kyRJOOeUU3n///YgrE5F0pRDIMKNHj2bChAnk5OQA8PHHH3PKKacwb968iCsTkXSkEMhAl156KVOnTqVNmzZAcGWywYMHM3HixIgrE5F0oxDIUMOHD+fvf/97Ygjq8vJyLrnkEu644w6qqqoirk5E0oVCIIMNGDCA4uJiDj/88MSyu+++m3PPPVenkIoIoBDIeIcccgjz5s1jyJAhiWVFRUUUFhbyzjvvRFiZiKQDhUAMdOjQgWnTpnHjjTcmln300UcMGDCARx55BHePsDoRiZJCICays7O57777eOGFF2jbti0QHCe49tprGTFihK5bLBJTKQ0BMzvNzGaF0x3NbIqZzTCziWbWNpW1xNWFF15IaWkpxx9/fGLZlClTOPbYY5k8eXKElYlIFFIWAmZ2I/Aw0CJcdCMw1d0HA4uAMamqJe4OP/xwiouL+clPfpJYtnbtWs4//3xGjhyprQKRGEnllsCHwAVJ86cBReF0EXBqbQ8yszFmVmJmJWVlZU1cYny0atWKhx56iGnTptGtW7fE8ueff54jjzySJ554QqeSisRAykLA3ScBFUmLOgHV5yluCedre9zj7l7o7oV5eXlNXGX8DBs2jCVLljBq1KjEsg0bNjBmzBhOOeUUFixYEF1xItLkojww/CWwXzjdHlgfYS2x1rFjR55++mmmTZtGz549E8uLi4vp378/P/jBD/jkk08irFBEmkqUITAbGBZOnx3OS4SqtwpuvfVWcnNzE8vHjRvHoYceylVXXcVHH30UYYUi0tiiDIH7gIvNbCZwNPB4hLVIqE2bNtxzzz0sXbqU8847L7G8oqKCsWPHcthhh3H55ZezbNmyCKsUkcaS0hBw91XuflI4vc7dz3b3Qe5+sbtvTWUtsme9e/fm5ZdfZvr06ZxyyimJ5VVVVUyYMIGjjz6aiy++WL2ORZo5dRaTPTrjjDOYPXs2M2fO3GXoCXfnhRdeoE+fPpx33nm89dZbEVYpIvtKISB1MjMGDhzIa6+9xrx58zjnnHN2WT916lQGDBjAmWeeyfPPP8+2bdsiqlRE9pZCQPbKSSedxNSpU1m4cCEXXHDBLuumT5/OyJEj6dq1K6NHj2bWrFnqayCS5hQCsk/69u3Liy++yJIlS7jsssvIyvrmT2nTpk08+eSTDBw4kF69enH77bezfPnyCKsVkd2x5jSCZGFhoZeUlERdhtRi1apVjB8/nvHjx/PBBx/Uep/+/ftzxRVXcMkllyQudiMiTc/MSt29sNZ1CgFpTO7O/PnzGT9+PM899xwbNmz41n1yc3M5/fTTOf/88/ne975H165dI6hUJD4UAhKJ8vJyioqKGD9+PK+88goVFRXfuo+Z0b9/f8466yzOPvts+vbtu8uuJRFpOIWARG79+vVMnDiRcePGUVxcvNv7denShaFDhzJ48GAGDx5MQUFB6ooUyVAKAUkrn3zyCZMnT2by5MnMmjWLysrK3d63R48eDBo0iEGDBnHyySdz2GGHYWYprFak+VMISNrasGEDr776KkVFRfztb39j3bp1e7x/hw4dGDBgAAMGDKBv37707duXgw8+WMEgsgcKAWkWqqqqKC0tZcaMGcyYMYPZs2ezdWvdo4l07NiRPn360KdPH4455hiOPPJIjjzySPbff/8UVC2S/hQC0ixVVFSwcOFCZsyYwdy5cykuLq5zSyFZt27dEoFwyCGH0KtXLw455BB69uyZuM6ySBwoBCQjuDsrVqxg3rx5lJaWsmjRIt5++202bty418/VpUsXevXqRc+ePTnooIPIz88nPz8/Md21a1dycnKaoBUiqacQkIzl7nz88ce8/fbbLFq0iGXLlrFs2TKWL1/Ojh079vl5s7Ky6NKlC/n5+eTl5ZGXl8cBBxyQmK55a9eunY5LSNpSCEjs7Ny5k5UrV7Js2TLef/99Vq5cyYoVK1ixYgWrVq2qtc9CQ7Ro0YL27dvv9tahQwf2339/9ttvP9q2bbvLrV27drvMZ2dnN2ptInsKAW3vSkbKycmhd+/e9O7d+1vrKisrWb16NR999BEff/wxq1ev5rPPPtvl37Vr1+7V6+3YsYO1a9fu9eNq06pVq28FRatWrWjZsmWDbi1atCA3N3eXW05OzreW7W59dna2tnYykEJAYic7O5vu3bvTvXv33d6nvLycNWvWsGbNGsrKyna5rV279lvLtm/f3mj1bd++ne3bt7N+ffpddruuoNiX9U39HDk5OWRnZ5OdnU1WVlZienfzcQs6hYBILVq2bElBQUG9eiy7O9u3b+err76q87Zlyxa2bt2a+Lf6ljyfzioqKhp9V1q6MbM6g6Ih8w15jjPPPJMRI0Y0ansVAiINZGa0bt2a1q1b061btwY9l7uzbdu2b4XE9u3bKS8vb9Ct+gu8ttvOnTvrXB+Xa0O4Ozt37mTnzp1Rl/It7dq1UwiIZDIzo02bNrRp0ybqUr6lqqpqn0OkvkHTkPW7u8/OnTuprKykqqqKysrKXW41l6X7iTJNcdKAQkBE6iUrKytxkDlTufsuwVAzJOqa35fH7M1z9uvXr9HbrBAQEQklHw+ICw3cLiISYwoBEZEYUwiIiMSYQkBEJMYUAiIiMaYQEBGJMYWAiEiMNauhpM2sDPi4AU/RGaj/pakyQ9zaHLf2gtocFw1pcw93z6ttRbMKgYYys5LdjamdqeLW5ri1F9TmuGiqNmt3kIhIjCkERERiLG4h8HjUBUQgbm2OW3tBbY6LJmlzrI4JiIjIruK2JSAiIkkUAiIiMRaLEDCzm8xsbng7Kep6GsrMcszsz2Y228zeMrNzzazAzF43szfM7FEzyw7v+wMzezO8nRcua2Vm481sppn9zcwOiLZF9WNmbcxshZkdYWYdzWyKmc0ws4lm1ja8z1Azmxd+1j8Ml5mZPWhms8L79462JfVjZjeEn/E7ZjY60z9jM8sysyfDz26emfXJ5Dab2WlmNiucbvDf8+7eqzq5e0bfgKOAWYABPYCSqGtqhDb9AHg4nO5M0IHueeDMcNnjwPlAR2AJ0Ar4DvAPoAVwDXBveN9LgQejblM9230/sAE4AvgdcFW4/BbgeoKLJC0DOgG5wNvAAcA/A8+E9/0u8HLUbalHW08EphD8UPsOcG+mf8bAcOCFcHoo8NdMbTNwI7AYKA7nG/z3XNt7VZ9a4rAlcCrwfx74GMgxs+9EXVQDTQJuDaerr/49AJgeThcRtPtEYJ67b3f3TcBygi/Q08L7JN83rZnZiQT/+ReHi2prwyHAZ+6+3t0rgDeAk5Lv6+5vAselsPR9dTbwHjA56ZbRnzGwA2hrZlnAfgRf9Jna5g+BC5LmG+Pvubb3qk5xCIFOwFdJ81vCZc2Wu29x941mth/wInA7kOvhTwC+aePu2p68PO3fDzPLAe4DbkhaXFsb6tNeCH4IpPvffjeCL7uLgKuBCWTwZxyaTfDL/j2CX7L/IEPb7O6TgIqkRY3x91zbe1WndP+P0Bi+JPhVUa09sD6iWhqNmeUTpP5f3H0cUGFmFq6ubuPu2p68vDm8H/8BjHf35HFTamtDfdoL4O5eRXrbDhS5e7m7Lyf4T90+gz9jCD7nN939MKAPQfC3zvA2V2uMv+favgPqFIcQmE2wfxEz6wlUhJuQzZaZdQNeBW5197Hh4hKCzUQIdiXMBt4CvmtmuWbWHugNvB+uG1bjvulsODDKzGYSfDmMI9hVULMNHwA9zKy9mbUgeD/mk9ReMxtIsG813c0DhoQHS7sB7YBpZO5nDNAW+DycXg9sJNgFksltrlZb7Xv791zbd0Ddoj5AkqKDMLeHb8ibwElR19MI7XmQ4D/LzKTb0cDfCQ6CPwZkh/f9d2Bu+MdzXrisNfBc+LhpQJeo27QXbZ9JsP+3M8GBw5nARKBtuP6s8HMuBn4YLssC/kjwhfIGcFjU7ahHO7OA34ftKAZOB3pl8mcMdCA49vFG2J7zMrnNQAHfHBhu8N/z7t6rum7qMSwiEmNx2B0kIiK7oRAQEYkxhYCISIwpBESamJkdlHTq3r4+R4MeL7I7CgHJSGZ2h5n9vsayI82sIJzeaWZH1PEcvzGzm8LpW8zstfCUze/V9qVsZoPN7Jgay9oDnxKcCbKn1zrKzP4tHBfmPTP7JzM7wMz+LbzLM2Z2wZ6eQ2RfKAQkUz0MnGVmyb2MfwRca2Zdw/nOZtbVzDrXfLCZ5RKchvhBuKgCaEnQk/ceYFLYkznZ6cACM7skadkAgvGOKsItguRb8vAlW4GfEHSQguCU33nAQeF8b3btYSrSKBQCkpHcfQMwCvi+BaOuGkEHm+MiSsgAAATJSURBVF8Ca4Bsgr4jawjOwa7pB8BO4OVwfjPQzt1XE4zJ0pvgXO3k17wd+DEw3sz6h4svIhjz6NNabpcnPfxUgqEhWhMMnXAIQaeoFeHwIIcCX5pZ56Sb/v9Kg9X8JSOSMdx9AXAygJldTtDrtpW7l5tZJdDb3VfUfJyZtQLuAB51953h4i8JuuLj7l+Z2feAnrW85pNmdghQaWZ5wCXAQHeflfT8rcPneyfpoaMJRsXsD5QDZxL0mB1N0Hu2PUEnoGQ9gVX1fkNEaqEQkIwTjqP+ftKigQRfrteHAZBLsBW8u90r9wIHA5+GX+hdgcOBA83sFoLenXlAVzMb4O6/SX6wu98S1vFHYKm7zzKzYQQjQi4l6AW6GViQ9JhBZjYWWAqcQjAK5GnACOCnBLuLOgAOrARGuPuqfXl/RJIpBCTjuHulmQ0h2OXzIcGumx8DPzaz55Lu+knS8d1J7n6hmf0zwbGD1eHyJ4HuBL/cWwL9wud8G/iCYAyjXYS7nk4AriIY+heC3UtLCL7kAe529/Kkx7xAEDzVwwT8DdgfuAu4mGAroQewjmBM+Xf38m0RqZVCQDKSu69KOnD7KcEB3WpXEXzRn0iw3x9guwVXonqe4LjBv4TPMwiCq14Bm4Bn3f3FOl5+PPAX4Ch3/zBc1g2YEz7npFoe8yDBlsG/E4yrvxH4OfDfYf1zw3rLgH+4+446ahCpFx1YkjiodPfP3b16hMofEvyqnwd8Hyhz96/cfS1Q6O5/rPkEHgzVu4hg19JuhVsBZwAbqgMgXHYscMweHvoewfDAPwMeIDhg/R2CkTV/TDCw2EiCQdXeqEebRepFISCxEZ7D/3eCIXcPIDgl83pgVjhcMx6M3b87U4BLzKzlHu5zGsGlDkuSlp1NcHnAS8PrQNSmLLz1Bp5Kmq9w92KCq8mdSXC20bg9vL7IXlEISCbrEP6bZ2aPEexuKQIudPed7j4Z6Euwn/21elyYezzBr/N/qV5gZreHB4+r/Qh4vvqsIjPrAPwB+BPBLqKHdvPcWQTB8Wt3N4KtgZfcfTRAeGrqGwShsLBerRepB4WAZCQzO45grPklBGPUdwVOc/dfJp32ibtvIdjNchuwx6EZ3H0N8FvgnvA8/WyCq2EdEL7miQS7l/5fOJ8HvEJwUPkugtNO+5rZf9bS47gzUApcY2ZvEYTJ3Unt6UewlZFnZj/c6zdEZDcUApJxzOyfCDqAPUNwEe47geOBYjP7ysw2mdkWM9tmZhXA18AL1O+arL8hCJe5BJ3FNgPzzaxd+Hpj3X2ZmZ0NLCS4FuxQDy6Kvo7g7J/vA2+a2RnVYeDuZQRf+q8AR4XPe5uZtQmfazrwXwRnGT1sZlc39H0SAXRRGck84VlBQ929qMbyAwgOvjpQRfDL3wjOkqtKOpMHM5tD8IX+51qePxu4DrgM+G93Hx+ePfQTggukFxKEyh3hc1TVePz+4boLCTqz7SQ43nAcwS6j24FKgq2OXOACgj4Oj4ePv4zg1NUh7j5n394lkYBCQKQJmFmuu+9xrB8zy6oOCDMbDsx39y9r3OdoYGvNjmFm1sPdP27ksiWGFAIiIjGmYwIiIjGmEBARiTGFgIhIjCkERERiTCEgIhJjCgERkRj7/653nobJrtqOAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#根据rss绘制曲线，以直观观察残差平方和的变化趋势\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "# 以下 font.family 设置仅适用于 Mac系统，其它系统请使用对应字体名称\n",
    "matplotlib.rcParams['font.family'] = 'Arial Unicode MS'\n",
    "plt.plot(range(10000),rss[0:10000],'-',c='black',linewidth=3)\n",
    "plt.xlabel(\"迭代次数\",fontsize=15)\n",
    "plt.ylabel(\"RSS\",fontsize=15)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "现使用Python实现预测函数gbrt_predict，即可对新数据进行预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gbrt_predict(x,gml_list,shrinkage):\n",
    "    \"\"\"\n",
    "    建立预测函数，对新数据进行预测\n",
    "    x : 进行预测的新数据\n",
    "    gml_list : 即GBRT的模型列表\n",
    "    shrinkage : 训练模型时，指定的shrinkage参数\n",
    "    \"\"\"\n",
    "    f0 = gml_list[0]\n",
    "    for i in range(1,len(gml_list)):\n",
    "        f0 = f0 + shrinkage*gml_list[i].predict(x)\n",
    "    return f0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "对测试数据集x_test，调用预测函数gbrt_predict，根据预测结果和真实结果，分析预测效果"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.3384897597030645"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sum((y_test - gbrt_predict(x_test,model_list,0.0005))**2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "为说明该方法对预测建模提升的效果，现直接使用回归决策树基于训练集x_train,y_train建模，并对测试数据集x_test进行预测，同时分析预测效果"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.5676935145052517"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf = tree.DecisionTreeRegressor(min_samples_leaf=30)\n",
    "clf = clf.fit(x_train, y_train)\n",
    "np.sum((y_test - clf.predict(x_test))**2)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
